rm(list = ls())
library(data.table)

load("MiamiDade7658Weekends.RData")
dta.w$week <- strftime(as.Date(dta.w$start_date_fa_seq), format = "%V")
dta.w <- dta.w[judge_cat != "UNNAMED", ]
dta.w <- dta.w[!(last_name == "UNKNOWN" | first_name == "UNKNOWN"), ]
dta.w <- dta.w[arrest_date != "00000000"]

dta.w$dup <- duplicated(dta.w$case_number)
dta.w <- dta.w[dup == 0, ]

dta.w$week_year <- paste(dta.w$bail_year, dta.w$week, sep = "-")
dta.w$dup <- duplicated(paste(dta.w$hybrid_id, dta.w$week_year, sep = "-"))
dta.w <- dta.w[dup == 0, ]

dta.w[, count_cases := .N, by = c("judge_cat", "week", "bail_year")]
dta.w[, count_cases_2 := .N, by = c("judge_cat", "bail_year")]
dta.w$judge_cat[dta.w$judge_cat == "MIRIAM LEHR"] <- "MYRIAM LEHR"

for(y in 2009:2016) {
  
  data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == y], p = 0.05), ]
  
  min.j <- min(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
  std.j <- sd(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
  ave.j <- mean(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
  med.j <- median(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
  max.j <- max(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
  num.j <- ncol(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 )
  per.j <- mean(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ) <= 2)

  print(y)
  print(c(ave.j, std.j, med.j, min.j, max.j, num.j, per.j))
}

for(y in 2009:2016) {
  data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == y], p = 0.05), ]

  min.j <- min(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
  std.j <- sd(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
  ave.j <- mean(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
  med.j <- median(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
  max.j <- max(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
  num.j <- ncol(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]))

  print(y)
  print(c(ave.j, std.j, med.j, min.j, max.j, num.j))
}
